rm(list=ls())

library(xts)
library(glmnet)
library(xlsx)
library(dplyr)
library(tidyr)
library(RColorBrewer)
library(devtools)
library(glmnetUtils)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

get_alpha <- function(fit) {
  alpha <- fit$alpha
  error <- sapply(fit$modlist, function(mod) {min(mod$cvm)})
  alpha[which.min(error)]
}

# Get all parameters.
get_model_params <- function(fit) {
  alpha <- fit$alpha
  lambdaMin <- sapply(fit$modlist, `[[`, "lambda.min")
  lambdaSE <- sapply(fit$modlist, `[[`, "lambda.1se")
  error <- sapply(fit$modlist, function(mod) {min(mod$cvm)})
  best <- which.min(error)
  data.frame(alpha = alpha[best], lambdaMin = lambdaMin[best],
             lambdaSE = lambdaSE[best], eror = error[best])
}



#load("0912_Download_ALLData.rda")
load("incident_hosp_cases.rda")


#vaccination
vaccination <- read.csv('COVID-19_Vaccinations_in_the_United_States_County.csv', check.names=FALSE,as.is=TRUE)
vaccin_selected<- cbind(vaccination[1],vaccination[5], vaccination[17])
colnames(vaccin_selected)[2]<- 'State'
vaccin_selected<-aggregate(vaccin_selected[3], by=list(Category=vaccin_selected$State, vaccin_selected$Date), FUN=max)
colnames(vaccin_selected)[2] <- 'Date'
vaccin_selected$Date<- anytime::anydate(vaccin_selected$Date)
colnames(vaccin_selected)[1]<- 'State'
vaccin_all<-vaccin_selected%>%group_by(State)%>%spread(State,Series_Complete_Pop_Pct)
vaccin_all<-xts(vaccin_all[,-1],order.by = vaccin_all$Date)
storage.mode(vaccin_all)<-'numeric'
vaccin_all<-100-vaccin_all
vaccin_append<-xts(matrix(100,nrow = length(index(national_hosp)),ncol = length(colnames(vaccin_all))),order.by = index(national_hosp))
colnames(vaccin_append)<-colnames(vaccin_all)
vaccin_append<-vaccin_append[index(vaccin_append)<index(vaccin_all)[1]]
vaccin_all<-rbind(vaccin_all,vaccin_append)
national_vaccin<-xts(rowSums(vaccin_all,na.rm = TRUE),order.by =index(vaccin_all) )
colnames(national_vaccin)<-"Vaccination"
# weekday indicator
Days_of_Week = matrix(0,dim(national_hosp)[1],7)
Days_of_Week = xts(Days_of_Week, index(national_hosp))
colnames(Days_of_Week) = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
for (i in 1:7){
  Days_of_Week[which(weekdays(index(Days_of_Week)) == colnames(Days_of_Week)[i]),i] = 1
}


load("gt_data_new.rda")
#terms_important<-names(MedRaw_optLag)[1:11]
# terms_important<-c("how.long.contagious",
#                    "loss.of.smell","loss.of.taste","X.covid.19.vaccine.","X.Cough." ,"pneumonia",
#                    "how.long.covid.19","sinus","symptoms.of.the.covid.19", "contagious.coronavirus",
#                    "X.coronavirus.vaccine.","the.covid.19","bronchitis","X.Headache.","rapid.covid.19",
#                    "robitussin","contagious.covid.19","X.Nasal.congestion.","covid.19.test")# 19 terms
#terms_important<-c("how.long.contagious","loss.of.smell","loss.of.taste","how.long.covid.19","symptoms.of.the.covid.19")
terms_important<-c("how.long.contagious",
                   "loss.of.smell","loss.of.taste","X.covid.19.vaccine.","X.Cough." ,"pneumonia",
                   "how.long.covid.19","sinus",
                   "symptoms.of.the.covid.19", "contagious.coronavirus","X.coronavirus.vaccine.")#11 terms, cor>0.6

n_train <- 56 
n_train_minimum <- 40
list_results_step1.State <- list()
coef_lasso_all <- list()
coef_smooth_step1.State <- list()
coef_all_AR7_LR<-list()
coef_all_AR7_ARIMA<-list()
coef_ridge_all<-list()
coef_net_all<-list()
alphas_cva_all<-list()
ts_max_lag <- 1:7  #lag a week
case_lag <- c(7,28) #a week and two weeks
vacc_lag <- c(7)
#gt_lag = case_lag
#mobility_lag <- c(14,21,28,35)
decay <- 0.8
pred_week = 1:14
region="US"
pred_target = national_hosp
alphas<-c(0,0.5,1)# searching through lasso, ridge and elastic net
nfolds<-10
temp_terms = terms_important

#selected gt terms
gt_nat <- stats::lag(rollmean(gtdataALL_IQR_noMedia[['US']],7),3)
gt_lag <- MedRaw_optLag[temp_terms]

state_cases<-national_cases
vaccin_state<-national_vaccin
if(TRUE){
#plot_dates<-seq(as.Date("2020-08-01"),as.Date("2021-12-31"),by=7)
visual_data<-na.omit(cbind(apply.weekly(national_hosp,sum),
                           apply.weekly(gtdataALL_IQR_noMedia[['US']][,"how.long.contagious"],sum)))
visual_data<-visual_data[index(visual_data)<as.Date("2021-12-31")]
visual_data<-visual_data[index(visual_data)>as.Date("2020-08-01")]
visual_data<-visual_data/7
visual_data[,2]<-visual_data[,2]*(mean(visual_data[,1])/mean(visual_data[,2]))
#plot(visual_data,main = "National Google Search Frequency of Term how.long.contagious")
library(ggplot2)
colnames(visual_data)<-c("National COVID-19 New Hospital Admissions","Search Frequency of 'how long contagious' ")

temp_matrix = matrix(NA,length(visual_data),3)
temp_matrix[,3] = as.numeric(matrix(visual_data,length(visual_data),1))
temp_matrix[,1] = rep(as.character(index(visual_data)), dim(visual_data)[2])
temp_matrix[,2] = rep(colnames(visual_data),each=dim(visual_data)[1])
temp_matrix = data.frame(temp_matrix)
temp_matrix[,3] = as.numeric(as.character(temp_matrix[,3]))
colnames(temp_matrix) = c("Week", "Data",  "Values")
temp_matrix[,1] = as.Date(temp_matrix[,1])
# Linechart
Nat_LineChart = ggplot(data=temp_matrix, aes(x=Week, y=Values, group=Data, color=Data)) + 
  geom_line(aes(size = Data,linetype=Data)) +
  scale_size_manual(values = c(.6,.6)  ) +
  scale_color_manual(values=c("red","blue"))+ 
  scale_linetype_manual(values= c(1,1))  + theme_minimal()+
  theme(text = element_text(size=10), axis.text.x = element_text(angle=0, hjust=1))  + 
  theme(legend.position = "bottom",legend.text = element_text(size=10),
        legend.key.width= unit(0.8, 'cm'),legend.title = element_text(size=10))+
  scale_x_date(breaks = '1 month',date_labels =  "%b %Y",guide = guide_axis(check.overlap = TRUE))+
  scale_y_continuous("Hospitalizations",sec.axis = sec_axis(~./mean(visual_data[,1])/mean(visual_data[,2]), name="National Search Frequency"))
png(file="test.png",width=18,height=15,units="cm",res=500)
print(plot(Nat_LineChart))
dev.off()
}
#visualization of gt and hosp
if(FALSE){
  pdf("gt_hosp.pdf")
  gt_selected <- lapply(temp_terms,function(i){gt_nat[,i]})
  gt_selected = do.call(cbind,gt_selected)
  brewer<-brewer.pal(3, "Dark2")
  
  for(i in 1:ncol(gt_selected)){
    ratio=mean(gt_selected[,i], na.rm = TRUE)/mean(national_hosp, na.rm = TRUE)
    curr_gt_hosp<-cbind(rollmean(gt_selected[,i],7)/ratio,national_hosp)
    colnames(curr_gt_hosp)[1]<-paste(colnames(curr_gt_hosp)[1],"_smoothed")
    
    plot(curr_gt_hosp,main=paste("national_hosp and ",colnames(curr_gt_hosp)[1]),col=brewer)
    print(addLegend("topleft",legend.names =colnames(curr_gt_hosp),col=brewer,
                    lty=c(1,1),
                    lwd=c(2,2),
                    ncol=2,
                    bg="white"))
  }
  dev.off()
}

for(n_forward in pred_week){
  
  #x_gt <- cbind(
  #stats::lag(gt_nat, pmax(gt_lag, n_forward))  
  #,stats::lag(gt_nat, n_forward)  
  #)
  x_ar <-stats::lag( pred_target,ts_max_lag +n_forward-1)
  x_cases <- stats::lag(state_cases, pmax(case_lag, n_forward))
  x_vacc<-stats::lag(vaccin_state,pmax(vacc_lag,n_forward))
  x_gt <- lapply(temp_terms,function(i){stats::lag(gt_nat[,i],pmax(gt_lag[i],n_forward) )}) 
  x_gt = do.call(cbind,x_gt)
  #x_mobility = stats::lag(gt_mobility, pmax(mobility_lag, n_forward))
  x_curr <- na.omit(merge(x_ar,x_cases, x_vacc,x_gt ,Days_of_Week)) 
  #exclude google search terms
  x_curr2 <- na.omit(merge(x_ar, x_cases, x_vacc ,Days_of_Week)) 
  
  #x_curr <- na.omit(merge(x_ar, x_cases, x_gt , x_mobility,  Days_of_Week)) 
  common_id_curr <- index(na.omit(merge(x_curr, pred_target, all = F)))  #get the lagged data's date, the dates that we will train and test for (dont have NA)
  
  pred <-  pred_target[common_id_curr] #predict the dates that have lagged data (omit first two weeks of data)
  pred[] <- NA
  pred_ARIMA700<-pred
  pred_ARIMA710<-pred
  pred_AR7_LR<-pred
  pred2<-pred
  pred_ridge<-pred
  pred2_ridge<-pred
  pred_smooth <-pred_target[common_id_curr]  
  pred_smooth[] <- NA
  pred_elasnet_full<-pred
  pred_elasnet_full_no_gt<-pred
  pred_cva_full<-pred
  pred_cva_full_no_gt<-pred
  alphas_cva<-pred
  coef <- matrix(nrow=length(common_id_curr), ncol = ncol(x_curr) + 1)
  coef <- xts(coef, order.by = common_id_curr)
  colnames(coef) <- c("intercept",colnames(x_curr))
  coef_ridge<-coef
  coef_net<-coef
  coef_ar7_lr<-matrix(nrow=length(common_id_curr), ncol = ncol(x_ar) + 1)
  coef_ar7_lr<-xts(coef_ar7_lr,order.by = common_id_curr)
  colnames(coef_ar7_lr)<-c("intercept",colnames(x_ar))
  
  coef_ar7_arima<-coef_ar7_lr
  colnames(coef_ar7_arima)<-c(colnames(x_ar),"intercept")
  
  
  
  
  coef_smooth <- matrix(nrow=length(common_id_curr), ncol = ncol(x_curr) + 1)
  coef_smooth <- xts(coef_smooth, order.by = common_id_curr)
  colnames(coef_smooth) <- c("intercept",colnames(x_curr))
  
  x_gt_new <- lapply(temp_terms,function(i){stats::lag(gt_nat[,i],pmax(gt_lag[i]-n_forward,0) )})
  x_gt_new = do.call(cbind,x_gt_new)
  x_new <- cbind(
    stats::lag(pred_target, ts_max_lag - 1),
    stats::lag(state_cases, pmax(case_lag - n_forward, 0)), 
    stats::lag(vaccin_state,pmax(vacc_lag-n_forward,0)),
    x_gt_new,
    stats::lag(Days_of_Week,(7-n_forward%%7))
  )  
  #exclude gt
  x_new2 <- cbind(
    stats::lag(pred_target, ts_max_lag - 1),  
    stats::lag(state_cases, pmax(case_lag - n_forward, 0)), 
    stats::lag(vaccin_state,pmax(vacc_lag-n_forward,0)),
    # x_gt_new,
    stats::lag(Days_of_Week,(7-n_forward%%7))
  )  
  for(date_cur in common_id_curr){
    
    
    train_id <- as.Date(date_cur) - ((n_train-1):0) #set training set, use up to 90 days of data to train parameters, when predicting each day use lagged death, cases, wordfreq and mobility
    train_id <- train_id[train_id >= common_id_curr[1]] #keep only dates past the first day of lagged data, 2020-02-04 (since before that no explanatory variables)
    if(length(train_id) >= n_train_minimum){ 
      if (length(which(as.matrix(pred_target[train_id])==0))>(n_train_minimum-5)){ #if too many zeros then perdict death is 0
        coef[as.Date(date_cur),] = 0 
        pred[as.Date(date_cur)] =pred_target[as.Date(date_cur)]
      }else{ #start training only when having at least n_train_minimum days of training data to tain (not considering lagged dates)
        #lasso full model
        x_lasso = x_curr[train_id]
        y_lasso = pred_target[index(x_lasso)]
        train_weights <- (decay)^(length(y_lasso):1)
        set.seed(100)
        lasso.fit <- glmnet::cv.glmnet(x = as.matrix(x_lasso), 
                                       y = as.matrix(y_lasso), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                       alpha = 1, weights = train_weights)
        lam.s <- lasso.fit$lambda.1se #select the lambda that gives the simpliest model within one standard error of optimal val of lambda
        coef[as.Date(date_cur),] <- as.numeric(coef(lasso.fit, lambda = lam.s)) #store the coef for the simpliest lambda in the training date's row (if common_id_curr[40] then stored in row 2020-02-04+39=2020-03-14) for all explanatory variables 
        pred[as.Date(date_cur)] <- predict(lasso.fit, newx = as.matrix(x_new[as.Date(date_cur)]), s = lam.s) #store prediction for that date ???predict tomorrow???
        
        # ridge
        ridge.fit <- glmnet::cv.glmnet(x = as.matrix(x_lasso), 
                                       y = as.matrix(y_lasso), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                       alpha = 0, weights = train_weights)
        lam.s_ridge <- ridge.fit$lambda.1se #select the lambda that gives the simpliest model within one standard error of optimal val of lambda
        pred_ridge[as.Date(date_cur)] <- predict(ridge.fit, newx = as.matrix(x_new[as.Date(date_cur)]), s = lam.s_ridge) #store prediction for that date ???predict tomorrow???
        coef_ridge[as.Date(date_cur),] <- as.numeric(coef(ridge.fit, lambda = lam.s_ridge))
        #elastic net
        net.fit<-glmnet::cv.glmnet(x = as.matrix(x_lasso), 
                                   y = as.matrix(y_lasso), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                   alpha = 0.5, weights = train_weights)
        lam.s<-net.fit$lambda.1se
        pred_elasnet_full[as.Date(date_cur)] <- predict(net.fit, newx = as.matrix(x_new[as.Date(date_cur)]), s = lam.s)
        coef_net[as.Date(date_cur),] <- as.numeric(coef(net.fit, lambda = lam.s))
        #cv alphas
        
        cva.fit<-cva.glmnet(
          as.matrix(x_lasso),
          as.matrix(y_lasso),
          alpha = alphas,
          nfolds = 10,
          foldid = sample(rep(seq_len(nfolds), length = nrow(x_lasso))),
          outerParallel = NULL,
          checkInnerParallel = TRUE,
          weights=train_weights,
          grouped = FALSE
        )
        
        pred_cva_full[as.Date(date_cur)]<- predict(
          cva.fit,
          newx=as.matrix(x_new[as.Date(date_cur)]),
          get_alpha(cva.fit),
          s="lambda.1se"
        )
        
        # no gt lasso
        x_lasso2<-x_curr2[train_id]
        y_lasso2<-pred_target[index(x_lasso2)]
        train_weights <- (decay)^(length(y_lasso2):1)
        lasso.fit2 <- glmnet::cv.glmnet(x = as.matrix(x_lasso2), 
                                        y = as.matrix(y_lasso2), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                        alpha = 1, weights = train_weights)
        lam.s2 <- lasso.fit$lambda.1se
        pred2[as.Date(date_cur)] <- predict(lasso.fit2, newx = as.matrix(x_new2[as.Date(date_cur)]), s = lam.s2)
        # no gt ridge
        ridge.fit2 <- glmnet::cv.glmnet(x = as.matrix(x_lasso2), 
                                        y = as.matrix(y_lasso2), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                        alpha = 0, weights = train_weights)
        lam.s2 <- ridge.fit2$lambda.1se
        pred2_ridge[as.Date(date_cur)] <- predict(ridge.fit2, newx = as.matrix(x_new2[as.Date(date_cur)]), s = lam.s2)
        # no gt elastic net
        net.fit2 <- glmnet::cv.glmnet(x = as.matrix(x_lasso2), 
                                        y = as.matrix(y_lasso2), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                        alpha = 0.5, weights = train_weights)
        lam.s2 <- net.fit2$lambda.1se
        pred_elasnet_full_no_gt[as.Date(date_cur)] <- predict(net.fit2, newx = as.matrix(x_new2[as.Date(date_cur)]), s = lam.s2)
        #cv alphas
        cva.fit<-cva.glmnet(
          as.matrix(x_lasso2),
          as.matrix(y_lasso2),
          alpha = alphas,
          nfolds = 10,
          foldid = sample(rep(seq_len(nfolds), length = nrow(x_lasso2))),
          outerParallel = NULL,
          checkInnerParallel = TRUE,
          weights=train_weights,
          grouped = FALSE
          
        )
        
        pred_cva_full_no_gt[as.Date(date_cur)]<- predict(
          cva.fit,
          newx=as.matrix(x_new2[as.Date(date_cur)]),
          get_alpha(cva.fit),
          s="lambda.1se"
        )
        alphas_cva[as.Date(date_cur)]<-get_alpha(cva.fit)
        # ARIMA AR700
        res<-try(AR7 <- arima(pred_target[train_id] , order = c(7,0,0)),silent = TRUE)
        if(typeof(res[1])=="list"){
          AR7 <- arima(pred_target[train_id] , order = c(7,0,0) )
          pred_ARIMA700[as.Date(date_cur)]<-predict(AR7 , n.ahead = n_forward)$pred[n_forward]
          coef_ar7_arima[as.Date(date_cur),]<-coef(AR7)
        }else{pred_ARIMA700[as.Date(date_cur)] =pred_target[as.Date(date_cur)]}
        # ARIMA AR710
        res<-try(AR7 <- arima(pred_target[train_id] , order = c(7,1,0)),silent = TRUE)
        if(typeof(res[1])=="list"){
          AR7 <- arima(pred_target[train_id] , order = c(7,1,0) )
          pred_ARIMA710[as.Date(date_cur)]<-predict(AR7 , n.ahead = n_forward)$pred[n_forward]
        }else{pred_ARIMA710[as.Date(date_cur)] =pred_target[as.Date(date_cur)]}
        #linear regression AR7
        ar7_lr<-lm(as.matrix(pred_target[train_id])~x_ar[train_id])
        pred_AR7_LR[as.Date(date_cur)]<-as.numeric(coef(ar7_lr)%*%t(cbind(1,x_ar[as.Date(date_cur)])))
        coef_ar7_lr[as.Date(date_cur),]<-coef(ar7_lr)
      }
      if (!is.na(as.numeric(coef[as.Date(date_cur),])) && !is.na(as.numeric(coef[(as.Date(date_cur)-1),])) && !is.na(as.numeric(coef[(as.Date(date_cur)-2),])) && !is.na(as.numeric(coef[(as.Date(date_cur)-3),])) && !is.na(as.numeric(coef[(as.Date(date_cur)-4),])) ){
        temp_coef =  (as.numeric(coef[as.Date(date_cur),])+  as.numeric(coef[(as.Date(date_cur)-1),])+  as.numeric(coef[(as.Date(date_cur)-2),]) + as.numeric(coef[(as.Date(date_cur)-3),]) + as.numeric(coef[(as.Date(date_cur)-4),]))/5
        coef_smooth[as.Date(date_cur),] = temp_coef
        pred_smooth[as.Date(date_cur)] = sum(temp_coef * c(1,as.numeric(x_new[as.Date(date_cur)])))
      }
    }
  }
  ##
  truth <- stats::lag(pred_target, -n_forward) #lag backwards (yesterday has today's data), truth is today predicting tomorrow and upto two weeks head (save to today's row now)
  naive <- pred_target #naive approach simply using the death count today (newest) to predict future (whether it is tomorrow or next week)
  naive_increment <- 2*pred_target-stats::lag(pred_target,n_forward) 
  naive_period <- stats::lag(pred_target , (7-n_forward%%7) )
  set.seed(100)
  argo_pre <- pred
  argo_presmooth <- round((pred_smooth))
  results <- merge(truth, naive, naive_increment, naive_period,pred, pred_ridge,pred_elasnet_full,pred_cva_full,pred2,pred2_ridge,pred_elasnet_full_no_gt,pred_cva_full_no_gt,pred_AR7_LR,pred_ARIMA700,pred_ARIMA710, all = F)
  colnames(results) <- c("truth", "naive", "naive_increment", "naive_period","full_model_lasso","full_model_ridge","full_model_elastic_net(0.5)","full_model_CV_alpha","full_model_no_gt_lasso","full_model_no_gt_ridge","full_model_no_gt_elastic_net","full_model_CV_no_gt_alpha","AR7_LinearRegression","AR7_ARIMA700","AR7_ARIMA710")
  list_results_step1.State[[region]][[n_forward]] <- results
  alphas_cva_all[[region]][[n_forward]]<-alphas_cva
  coef_lasso_all[[region]][[n_forward]] <- coef
  coef_smooth_step1.State[[region]][[n_forward]] <- coef_smooth
  coef_ridge_all[[region]][[n_forward]]<-coef_ridge
  coef_all_AR7_LR[[region]][[n_forward]]<-coef_ar7_lr
  coef_all_AR7_ARIMA[[region]][[n_forward]]<-coef_ar7_arima
  coef_net_all[[region]][[n_forward]]<-coef_net
}

list_results_nat_0901 = list()
pred_week = 1:7
for (i in names(list_results_step1.State)){
  list_results_nat_0901[["1 week ahead"]][[i]] = lapply(pred_week, function(x) list_results_step1.State[[i]][[x]])
  list_results_nat_0901[["1 week ahead"]][[i]]  = Reduce("+", list_results_nat_0901[["1 week ahead"]][[i]]) 
  #list_results_State_0328_2021Pred[["1 week ahead"]][[i]] = list_results_State_0328_2021Pred[["1 week ahead"]][[i]]["2021-01/",]
}
pred_week = 8:14
for (i in names(list_results_step1.State)){
  list_results_nat_0901[["2 week ahead"]][[i]] = lapply(pred_week, function(x) list_results_step1.State[[i]][[x]])
  list_results_nat_0901[["2 week ahead"]][[i]]= Reduce("+", list_results_nat_0901[["2 week ahead"]][[i]])
}
# pred_week =15:21
# for (i in names(list_results_step1.State)){
#   list_results_nat_0901[["3 week ahead"]][[i]] = lapply(pred_week, function(x) list_results_step1.State[[i]][[x]])
#   list_results_nat_0901[["3 week ahead"]][[i]]  = Reduce("+", list_results_nat_0901[["3 week ahead"]][[i]]) 
#   #list_results_State_0328_2021Pred[["1 week ahead"]][[i]] = list_results_State_0328_2021Pred[["1 week ahead"]][[i]]["2021-01/",]
# }
# pred_week = 22:28
# for (i in names(list_results_step1.State)){
#   list_results_nat_0901[["4 week ahead"]][[i]] = lapply(pred_week, function(x) list_results_step1.State[[i]][[x]])
#   list_results_nat_0901[["4 week ahead"]][[i]]= Reduce("+", list_results_nat_0901[["4 week ahead"]][[i]])
# }


# save(list_results_nat_0901,list_results_step1.State,coef_lasso_all,coef_ridge_all,
#      coef_all_AR7_LR,coef_net_all,
#      coef_all_AR7_ARIMA,alphas_cva_all,file = "national_prediction_hosp_admission_0117_5terms.rda")
if(TRUE){
  load("national_prediction_hosp_admission_0117.rda")
  national_predication<-na.omit(list_results_nat_0901[["1 week ahead"]][[1]])

dates<-seq(as.Date("2021-01-04"),as.Date("2022-01-10"),by=7)
national_predication<-national_predication[dates]
library(Metrics)
evaluation_matrix<-matrix(NA,nrow = length(colnames(national_predication))-1,ncol = 3)
rownames(evaluation_matrix)<-colnames(national_predication)[2:ncol(national_predication)]
colnames(evaluation_matrix)<-c("RMSE","Cor","MAE")
for (method in colnames(national_predication)[2:ncol(national_predication)]){
  curr_result<-national_predication[,method]
  evaluation_matrix[method,'RMSE']<-round(rmse(curr_result,national_predication[,1]),digits = 3)
  evaluation_matrix[method,'Cor']<-round(cor(curr_result,national_predication[,1]),digits = 4)
  evaluation_matrix[method,'MAE']<-round(mae(curr_result,national_predication[,1]),digits = 3)
}
evaluation_matrix<-evaluation_matrix[order(evaluation_matrix[,1]),]
#write.xlsx(evaluation_matrix,"national_hosp_prediciton_0117.xlsx",append = TRUE,sheetName = "one-week")
#evaluation_matrix<-evaluation_matrix[-c(6,10),]
#national_predication<-na.omit(list_results_nat_0901[["1 week ahead"]][[1]])
library(ggplot2)
visual_data<-national_predication[,c("truth","full_model_lasso","full_model_no_gt_lasso","AR7_ARIMA700","naive_increment","full_model_CV_alpha")]
colnames(visual_data)<-c("Truth","ARGO","ARGO No GT","AR7","naive_increment","ARGO_CVA")

temp_matrix = matrix(NA,length(visual_data),3)
temp_matrix[,3] = as.numeric(matrix(visual_data,length(visual_data),1))
temp_matrix[,1] = rep(as.character(index(visual_data)), dim(visual_data)[2])
temp_matrix[,2] = rep(colnames(visual_data),each=dim(visual_data)[1])
temp_matrix = data.frame(temp_matrix)
temp_matrix[,3] = as.numeric(as.character(temp_matrix[,3]))
colnames(temp_matrix) = c("Week", "Methods",  "Hospitalizations")
temp_matrix[,1] = as.Date(temp_matrix[,1])
# Linechart
Nat_LineChart = ggplot(data=temp_matrix, aes(x=Week, y=Hospitalizations, group=Methods, color=Methods)) + 
  geom_line(aes(size = Methods,linetype=Methods)) +scale_size_manual(values = c(0.4,.6,.4,.4,0.4,.6)  ) +
  scale_color_manual(values=c("gold3","red","green4","blue","cyan","black"))+ 
  scale_linetype_manual(values= c(1,1,1,1,1,1))  + theme_minimal()+
  theme(text = element_text(size=8), axis.text.x = element_text(angle=0, hjust=1))  + theme(legend.position = "bottom")
png(file="two-week-nat_new_terms.png",width=18,height=15,units="cm",res=500)
print(plot(Nat_LineChart))
dev.off()
}
# write alphas
if(FALSE){
  alphas_all<-NA
  for(i in 1:7){
    curr_alphas<-na.omit(alphas_cva_all[["US"]][[i]])
    colnames(curr_alphas)<-paste(i,"th day",sep = "")
    alphas_all<-cbind(alphas_all,curr_alphas)
  }
  alphas_all<-alphas_all[,-1]
  write.xlsx(alphas_all,file = "national_alphas.xlsx")
  
}

#calculate daily RMSE without weekly aggregation

if(FALSE){
  list_result_error<-list()
  pred_week = 1:14
  for(i in pred_week){
    curr_pred<-list_results_step1.State[[1]][[i]]
    for(j in colnames(curr_pred)){
      if(j!="truth"){
        curr_pred[,j]<-(curr_pred[,j]-curr_pred$truth)^2
      }
    }
    list_result_error[["US"]][[i]]<-curr_pred
  }
  list_result_error_sum<-list()
  pred_week = 1:7
  for (i in names(list_result_error)){
    list_result_error_sum[["1 week ahead"]][[i]] = lapply(pred_week, function(x) list_result_error[[i]][[x]])
    list_result_error_sum[["1 week ahead"]][[i]]  = Reduce("+", list_result_error_sum[["1 week ahead"]][[i]]) 
    list_result_error_sum[["1 week ahead"]][[i]]  = (list_result_error_sum[["1 week ahead"]][[i]]/7)^0.5
    one_week_pred<-list_result_error_sum[["1 week ahead"]][[i]]
    one_week_ahead_RMSE<-matrix(NA,nrow = ncol(one_week_pred)-1,ncol = 1)
    rownames(one_week_ahead_RMSE)<-colnames(one_week_pred)[2:length(colnames(one_week_pred))]
    for(j in rownames(one_week_ahead_RMSE)){
      one_week_ahead_RMSE[j,]<-mean(one_week_pred[dates,j])
    }
    colnames(one_week_ahead_RMSE)<-"Average RMSE"
    #list_results_State_0328_2021Pred[["1 week ahead"]][[i]] = list_results_State_0328_2021Pred[["1 week ahead"]][[i]]["2021-01/",]
  }
  pred_week = 8:14
  for (i in names(list_result_error)){
    list_result_error_sum[["2 week ahead"]][[i]] = lapply(pred_week, function(x) list_result_error[[i]][[x]])
    list_result_error_sum[["2 week ahead"]][[i]]= Reduce("+", list_result_error_sum[["2 week ahead"]][[i]])
    list_result_error_sum[["2 week ahead"]][[i]]  = (list_result_error_sum[["2 week ahead"]][[i]]/7)^0.5
    two_week_pred<-list_result_error_sum[["2 week ahead"]][[i]]
    two_week_ahead_RMSE<-matrix(NA,nrow = ncol(two_week_pred)-1,ncol = 1)
    rownames(two_week_ahead_RMSE)<-colnames(two_week_pred)[2:length(colnames(two_week_pred))]
    for(j in rownames(two_week_ahead_RMSE)){
      two_week_ahead_RMSE[j,]<-mean(two_week_pred[dates,j])
    }
    colnames(two_week_ahead_RMSE)<-"Average RMSE"
  }
  
  
}
